Exploratory Visualizations
Wind Speed 1873
wind_speed_1873 <- journal_1873 %>%
select(date_mdy,
month,
wind_speed_am,
wind_speed_pm,
wind_speed_night) %>%
mutate(year = "1873") %>%
pivot_longer(cols = starts_with("wind_speed"),
names_to = "period",
values_to = "wind_speed") %>%
mutate(period = case_when(
period == "wind_speed_am" ~ "am",
period == "wind_speed_pm" ~ "pm",
period == "wind_speed_night" ~ "night")) %>%
separate_rows(wind_speed, sep = ",") %>%
mutate(category = case_when(
wind_speed %in% c("blustering", "very blustering") ~ "blustering",
wind_speed %in% c("breezy", "good breeze", "very fresh breeze", "breezed up", "fresh breeze", "heavy breeze", "smart breeze", "strong breeze") ~ "breeze",
wind_speed %in% c("blowing very heavy", "fresh blow", "heavy blow", "very heavy blow", "blowy", "blowing", "blowing hard", "blowing very hard") ~ "blow",
wind_speed %in% c("fresh gale", "gale") ~ "gale",
wind_speed %in% c("moderate", "quite moderate", "very moderate") ~ "moderate",
wind_speed %in% c("pleasant", "quite pleasant", "very pleasant") ~ "pleasant",
wind_speed %in% c("calm", "perfectly calm") ~ "calm",
wind_speed %in% c("scant wind", "heavy winds") ~ "wind",
wind_speed %in% c("rough", "squall", "strong", "very light", "heavy", "baffling", "a light air", "variable", "fair") ~ "other intensities"
))
wind_speed_1873 %>%
mutate(wind_speed = ifelse(wind_speed == "NA", NA, wind_speed)) %>%
drop_na(wind_speed, period, category) %>%
group_by(wind_speed, period, category) %>%
summarize(n = n()) %>%
mutate(period = factor(period, levels = c("am", "pm", "night"))) %>%
ggplot(aes(x = wind_speed,
y = n,
fill = category)) +
geom_bar(stat = "identity",
position = "dodge") +
labs(title = "Wind Vocabulary Frequency by Period of the Day 1873",
x = "Wind Speed Vocabulary",
y = "Frequency",
fill = "Category") +
theme_minimal() +
coord_flip() +
facet_wrap(~ period)
## `summarise()` has grouped output by 'wind_speed', 'period'. You can override
## using the `.groups` argument.

wind_speed_1873 %>%
mutate(wind_speed = ifelse(wind_speed == "NA", NA, wind_speed)) %>%
drop_na(wind_speed, period, category) %>%
group_by(wind_speed, period, category) %>%
summarize(n = n()) %>%
mutate(period = factor(period, levels = c("am", "pm", "night"))) %>%
ggplot(aes(x = wind_speed,
y = n,
fill = category)) +
geom_bar(stat = "identity",
position = "dodge") +
labs(title = "General Wind Vocabulary Frequency 1873",
x = "Wind Speed Vocabulary",
y = "Frequency",
fill = "Category") +
theme_minimal(base_size = 7) +
coord_polar() +
scale_y_log10() # to better see smaller frequencies
## `summarise()` has grouped output by 'wind_speed', 'period'. You can override
## using the `.groups` argument.

wind_speed_1873 %>%
mutate(wind_speed = ifelse(wind_speed == "NA", NA, wind_speed)) %>%
drop_na(wind_speed, period, category) %>%
group_by(wind_speed, period, category) %>%
summarize(n = n()) %>%
mutate(period = factor(period, levels = c("am", "pm", "night"))) %>%
ggplot(aes(x = "", y = n, fill = category)) +
geom_bar(stat = "identity", width = 1) + # 1 for pie chart effect
labs(x = NULL, y = "Frequency", fill = "Category") +
theme_minimal() +
coord_polar("y", start = 0) +
theme_void() +
labs(title = " 1873 Wind Vocabulary by Category")
## `summarise()` has grouped output by 'wind_speed', 'period'. You can override
## using the `.groups` argument.

Wind Speed Comparison 1873-1874
wind_speed_1874 <- journal_1874 %>%
select(date_mdy,
month,
wind_speed_am,
wind_speed_pm,
wind_speed_night) %>%
mutate(year = 1874) %>%
pivot_longer(cols = starts_with("wind_speed"),
names_to = "period",
values_to = "wind_speed") %>%
mutate(period = case_when(
period == "wind_speed_am" ~ "am",
period == "wind_speed_pm" ~ "pm",
period == "wind_speed_night" ~ "night")) %>%
separate_rows(wind_speed, sep = ",") %>%
mutate(category = case_when(
wind_speed %in% c("blustering", "very blustering") ~ "blustering",
wind_speed %in% c("breezy", "good breeze", "very fresh breeze", "breezed up", "fresh breeze", "heavy breeze", "smart breeze", "strong breeze") ~ "breeze",
wind_speed %in% c("blowing very heavy", "fresh blow", "heavy blow", "very heavy blow", "blowy", "blowing", "blowing hard", "blowing very hard") ~ "blow",
wind_speed %in% c("fresh gale", "gale") ~ "gale",
wind_speed %in% c("moderate", "quite moderate", "very moderate") ~ "moderate",
wind_speed %in% c("pleasant", "quite pleasant", "very pleasant") ~ "pleasant",
wind_speed %in% c("calm", "perfectly calm") ~ "calm",
wind_speed %in% c("scant wind", "heavy winds") ~ "wind",
wind_speed %in% c("rough", "squall", "strong", "very light", "heavy", "baffling", "a light air", "variable", "fair") ~ "other intensities"
))
combined_wind_speed<- rbind(wind_speed_1873, wind_speed_1874)
combined_wind_speed %>%
mutate(wind_speed = ifelse(wind_speed == "NA", NA, wind_speed)) %>%
drop_na(wind_speed, period, category, year) %>%
group_by(wind_speed, category, year) %>%
summarize(n = n()) %>%
ggplot(aes(x = wind_speed, y = n, fill = category)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Yearly Comparison of Wind Speed Vocabulary",
subtitle = "Colored by Overarching Categories in 1873 and 1874",
x = "Wind Speed Vocabulary",
y = "Frequency",
fill = "Category") +
theme_minimal() +
coord_flip() +
facet_wrap(~ year)
## `summarise()` has grouped output by 'wind_speed', 'category'. You can override
## using the `.groups` argument.

combined_wind_speed %>%
mutate(wind_speed = ifelse(wind_speed == "NA", NA, wind_speed)) %>%
drop_na(wind_speed, period, category, year) %>%
group_by(category, month, year) %>%
summarise(frequency = n()) %>%
ggplot(aes(x = category, y = frequency, fill = as.factor(year))) +
geom_bar(stat = "identity", position = "stack") +
labs(title = "Monthly Comparison of Wind Speed Categories 1873-1874",
x = "Category",
y = "Frequency",
fill = "Year") +
facet_wrap(~ month) +
coord_flip()
## `summarise()` has grouped output by 'category', 'month'. You can override using
## the `.groups` argument.

Wind Directions 1873
convert_wind_direction_to_degrees <- function(wind_direction) {
degrees <- case_when(
wind_direction == "N" ~ 0,
wind_direction == "NNE" ~ 22.5,
wind_direction == "NE" ~ 45,
wind_direction == "ENE" ~ 67.5,
wind_direction == "E" ~ 90,
wind_direction == "ESE" ~ 112.5,
wind_direction == "SE" ~ 135,
wind_direction == "SSE" ~ 157.5,
wind_direction == "S" ~ 180,
wind_direction == "SSW" ~ 202.5,
wind_direction == "SW" ~ 225,
wind_direction == "WSW" ~ 247.5,
wind_direction == "W" ~ 270,
wind_direction == "WNW" ~ 292.5,
wind_direction == "NW" ~ 315,
wind_direction == "NNW" ~ 337.5,
TRUE ~ NA_real_ # Return NA if the wind direction is not recognized
)
return(degrees)
}
wind_direction_1873_long <- journal_1873 %>%
pivot_longer(cols = starts_with("wind_direction"),
names_to = "period",
values_to = "wind_direction") %>%
separate_rows(wind_direction, sep = ", ") %>%
mutate(period = case_when(
period == "wind_direction_am" ~ "am",
period == "wind_direction_pm" ~ "pm",
period == "wind_direction_night" ~ "night"
))
wind_direction_1873_long <- wind_direction_1873_long %>%
mutate(
wind_degrees = case_when(
period == "am" ~ convert_wind_direction_to_degrees(wind_direction),
period == "pm" ~ convert_wind_direction_to_degrees(wind_direction),
period == "night" ~ convert_wind_direction_to_degrees(wind_direction)
)) %>%
select(date_mdy, month, wind_direction, period, wind_degrees) %>%
mutate(year = "1873")
# Function to convert degrees to radians for polar coordinates
to_radians <- function(degrees) {
return((degrees - 90) * pi / 180)
}
wind_direction_1873_long %>%
ggplot(aes(x = to_radians(wind_degrees))) +
geom_bar(aes(fill = stat(count)),
bins = 16,
color = "black") +
scale_fill_viridis_c(option = "plasma", name = "Frequency") +
geom_text(aes(x = to_radians(wind_degrees),
y = 150, label = wind_direction),
size = 4, fontface = "bold",
color = "black") +
labs(title = "Wind Directions Mentionned, 1873",
"Only Directions Explicitely Specified by Period",
x = NULL,
y = NULL) +
theme_minimal() +
theme(axis.text = element_blank()) +
coord_polar() +
scale_y_continuous(name = "Frequency",
trans = "log10") # Frequency scale (logarithmic)
## Warning in geom_bar(aes(fill = stat(count)), bins = 16, color = "black"):
## Ignoring unknown parameters: `bins`
## Warning: `stat(count)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 761 rows containing non-finite values (`stat_count()`).
## Warning: Removed 761 rows containing missing values (`geom_text()`).

wind_direction_1873_long %>%
mutate(period = factor(period, levels = c("am", "pm", "night"))) %>%
ggplot(aes(x = to_radians(wind_degrees))) +
geom_bar(aes(fill = stat(count)),
bins = 16,
color = "black") +
scale_fill_viridis_c(option = "plasma", name = "Frequency") +
geom_text(aes(x = to_radians(wind_degrees),
y = 150, label = wind_direction),
size = 4, fontface = "bold",
color = "black") +
labs(title = "Wind Directions Mentionned by Period of the Day, 1873",
x = NULL,
y = NULL) +
theme_minimal() +
theme(axis.text = element_blank()) +
coord_polar() +
scale_y_continuous(name = "Frequency",
trans = "log10") + # Frequency scale (logarithmic)
facet_wrap(~ period)
## Warning in geom_bar(aes(fill = stat(count)), bins = 16, color = "black"):
## Ignoring unknown parameters: `bins`
## Warning: Removed 761 rows containing non-finite values (`stat_count()`).
## Warning: Removed 761 rows containing missing values (`geom_text()`).

wind_direction_1873_long %>%
ggplot(aes(x = to_radians(wind_degrees))) +
geom_bar(aes(fill = stat(count)),
bins = 16,
color = "black") +
scale_fill_viridis_c(option = "plasma", name = "Frequency") +
geom_text(aes(x = to_radians(wind_degrees),
y = 150, label = wind_direction),
size = 4, fontface = "bold",
color = "black") +
labs(title = "Wind Directions Mentioned by Month, 1873",
x = NULL,
y = NULL) +
theme_minimal() +
theme(axis.text = element_blank()) +
coord_polar() +
scale_y_continuous(name = "Frequency",
trans = "log10") + # Frequency scale (logarithmic)
facet_wrap(~ month)
## Warning in geom_bar(aes(fill = stat(count)), bins = 16, color = "black"):
## Ignoring unknown parameters: `bins`
## Warning: Removed 761 rows containing non-finite values (`stat_count()`).
## Warning: Removed 761 rows containing missing values (`geom_text()`).

wind_direction_1873_long %>%
mutate(wind_direction = ifelse(wind_direction == "NA", NA, wind_direction)) %>%
mutate(period = factor(period, levels = c("am", "pm", "night"))) %>%
drop_na(wind_direction, period, month) %>%
ggplot(aes(x = wind_direction, fill = period)) +
geom_bar(position = "stack", color = "black") +
labs(title = "Bar Chart of Monthly Wind Directions Frequencies in 1873",
x = "Wind Direction",
y = "Frequency") +
coord_flip() +
facet_wrap(~ month, nrow = 2)

Wind Direction Comparison 1873-1874
wind_direction_1874_long <- journal_1874 %>%
pivot_longer(cols = starts_with("wind_direction"),
names_to = "period",
values_to = "wind_direction") %>%
separate_rows(wind_direction, sep = ", ") %>%
mutate(period = case_when(
period == "wind_direction_am" ~ "am",
period == "wind_direction_pm" ~ "pm",
period == "wind_direction_night" ~ "night"
))
wind_direction_1874_long <- wind_direction_1874_long %>%
mutate(
wind_degrees = case_when(
period == "am" ~ convert_wind_direction_to_degrees(wind_direction),
period == "pm" ~ convert_wind_direction_to_degrees(wind_direction),
period == "night" ~ convert_wind_direction_to_degrees(wind_direction)
)) %>%
select(date_mdy, month, wind_direction, period, wind_degrees) %>%
mutate(year = "1874")
combined_wind_direction <- rbind(wind_direction_1873_long, wind_direction_1874_long)
combined_wind_direction %>%
mutate(wind_direction = ifelse(wind_direction == "NA", NA, wind_direction)) %>%
drop_na(wind_direction, period, year) %>%
ggplot(aes(x = wind_direction, fill = year)) +
geom_bar(position = "stack", color = "black") +
labs(title = "Bar Chart of Yearly Wind Directions Frequencies",
x = "Wind Direction",
y = "Frequency") +
coord_flip()

Wind Direction and Wind Speed
combined_wind <- full_join(combined_wind_direction, combined_wind_speed, by = c("date_mdy", "month", "year", "period"))
## Warning in full_join(combined_wind_direction, combined_wind_speed, by = c("date_mdy", : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 94 of `x` matches multiple rows in `y`.
## ℹ Row 409 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
combined_wind <- combined_wind %>%
mutate(wind_direction = ifelse(wind_direction == "NA", NA, wind_direction)) %>%
mutate(wind_speed = ifelse(wind_speed == "NA", NA, wind_speed )) %>%
drop_na(wind_direction, wind_speed)
freq_combined_wind <- combined_wind %>%
group_by(wind_speed,
wind_direction) %>%
summarize(frequency = n())
## `summarise()` has grouped output by 'wind_speed'. You can override using the
## `.groups` argument.
freq_combined_wind %>%
ggplot(aes(x = wind_direction,
y = wind_speed,
fill = frequency)) +
geom_tile() +
scale_fill_viridis_c() +
labs(title = "Wind Speed and Wind Direction Heat Map",
x = "Wind Direction",
y = "Wind Speed",
fill = "Frequency") +
scale_fill_viridis_c(direction = -1) #for darker = higehr frequency
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

combined_wind %>%
group_by(wind_speed,
wind_direction,
year) %>%
summarize(frequency = n()) %>%
ggplot(aes(x = wind_direction,
y = wind_speed,
fill = frequency)) +
geom_tile() +
scale_fill_viridis_c() +
labs(title = "Wind Speed and Wind Direction Heat Map by Year",
x = "Wind Direction",
y = "Wind Speed",
fill = "Frequency") +
scale_fill_viridis_c(direction = -1) +
facet_wrap(~ year)
## `summarise()` has grouped output by 'wind_speed', 'wind_direction'. You can
## override using the `.groups` argument.
## Scale for fill is already present. Adding another scale for fill, which will
## replace the existing scale.

Weather Conditions
weather_con_1873_long <- journal_1873 %>%
select(date_mdy,
month,
weather_condition_am,
weather_condition_pm,
weather_condition_night) %>%
mutate(year = "1873") %>%
pivot_longer(cols = starts_with("weather_condition"),
names_to = "period",
values_to = "weather_condition") %>%
mutate(period = case_when(
period == "weather_condition_am" ~ "am",
period == "weather_condition_pm" ~ "pm",
period == "weather_condition_night" ~ "night")) %>%
separate_rows(weather_condition, sep = ",") %>%
mutate(category = case_when(
weather_condition %in% c("chilly", "cold", "cool", "extremely cold", "very cold", "quite cold") ~ "cold",
weather_condition %in% c("pleasant", "very pleasant") ~ "pleasant",
weather_condition %in% c("very warm", "warm", "hot") ~ "warm",
weather_condition %in% c("clear", "cleared up", "fine") ~ "clear",
weather_condition %in% c("overcast", "cloudy") ~ "cloud",
weather_condition %in% c( "pleasant", "quite pleasant", "very pleasant") ~ "pleasant",
weather_condition %in% c("calm", "perfectly calm") ~ "calm",
weather_condition %in% c("foggy", "foggy", "dense fog", "thick fog", "very foggy", "thick with fog", "very thick with fog") ~ "fog",
weather_condition %in% c("heavy showers", "very heavy shower", "shower", "showery", "a little showery") ~ "showers",
weather_condition %in% c("drizzle", "drizzling rain") ~ "drizzle",
weather_condition %in% c("cold rain", "heavy rain storm", "heavy rain", "fine rain", "raining", "rainy", "rain", "rain spells", "rain squall", "rain storm", "moderate rain", "big rain storm", "very heavy rain") ~ "rain",
weather_condition %in% c("little snow", "snow sleet", "light snow", "thick snow", "pleasant snow", "snowy", "snow", "snow spells", "snow squall", "big snow storm", "snow storm", "snowing", "snowing fast", "moderate snow") ~ "snow",
weather_condition %in% c("stormy", "tough storm", "very heavy storm", "moderate rainstorm", "heavy storm") ~ "storm",
weather_condition %in% c("thunder", "heavy thunder") ~ "thunder",
weather_condition %in% c("sharp lightning", "lightning") ~ "lightning",
weather_condition %in% c("misty", "good weather", "moderate weather", "sun out", "hail") ~ "other",
))
weather_con_1873_freq <- weather_con_1873_long %>%
mutate(weather_condition = ifelse(weather_condition == "NA", NA, weather_condition)) %>%
drop_na(weather_condition) %>%
count(weather_condition)
category_colors <- c(
"cold" = "#16324a",
"pleasant" = "#ccf146",
"warm" = "#b5a642",
"clear" = "#9467bd",
"cloud" = "#8c564b",
"calm" = "#2ca02c",
"fog" = "#e377c2",
"showers" = "#1f77b4",
"drizzle" = "#004f95",
"rain" = "#17becf",
"snow" = "#bbbbbb",
"storm" = "#ff7f0e",
"thunder" = "#d62728",
"lightning" = "#ff9896",
"other" = "#c5b0d5"
)
wordcloud(
weather_con_1873_freq$weather_condition,
weather_con_1873_freq$n,
colors = category_colors,
random.order = FALSE,
scale = c(5, 1),
min.freq = 1,
max.words = Inf
)

weather_con_1873_long %>%
mutate(weather_condition = ifelse(weather_condition == "NA", NA, weather_condition)) %>%
drop_na( weather_condition, period, category) %>%
group_by( weather_condition, period, category) %>%
summarize(n = n()) %>%
mutate(period = factor(period, levels = c("am", "pm", "night"))) %>%
ggplot(aes(x = weather_condition,
y = n,
fill = category)) +
geom_bar(stat = "identity",
position = "dodge") +
labs(title = "Weather Condition Frequency by Period of the Day 1873",
x = "Weather Condition Vocabulary",
y = "Frequency",
fill = "Category") +
theme_minimal() +
coord_flip() +
facet_wrap(~ period)
## `summarise()` has grouped output by 'weather_condition', 'period'. You can
## override using the `.groups` argument.

weather_con_1873_long %>%
mutate(weather_condition = ifelse(weather_condition == "NA", NA, weather_condition)) %>%
drop_na(weather_condition, category, month) %>%
group_by(month) %>%
count(category) %>%
ggplot(aes(x = factor(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")),
y = n,
fill = category )) +
geom_col(position = "stack") +
scale_fill_brewer(palette = "Spectral") +
labs(title = "Weather Frequency By Category per month",
fill = "Caterogy",
x = "Month",
y = "Number of occurrences") +
coord_polar()

weather_con_1874_long <- journal_1874 %>%
select(date_mdy,
month,
weather_condition_am,
weather_condition_pm,
weather_condition_night) %>%
mutate(year = "1874") %>%
pivot_longer(cols = starts_with("weather_condition"),
names_to = "period",
values_to = "weather_condition") %>%
mutate(period = case_when(
period == "weather_condition_am" ~ "am",
period == "weather_condition_pm" ~ "pm",
period == "weather_condition_night" ~ "night")) %>%
separate_rows(weather_condition, sep = ",") %>%
mutate(category = case_when(
weather_condition %in% c("chilly", "cold", "cool", "extremely cold", "very cold", "quite cold") ~ "cold",
weather_condition %in% c("pleasant", "very pleasant") ~ "pleasant",
weather_condition %in% c("very warm", "warm", "hot") ~ "warm",
weather_condition %in% c("clear", "cleared up", "fine") ~ "clear",
weather_condition %in% c("overcast", "cloudy") ~ "cloud",
weather_condition %in% c( "pleasant", "quite pleasant", "very pleasant") ~ "pleasant",
weather_condition %in% c("calm", "perfectly calm") ~ "calm",
weather_condition %in% c("foggy", "foggy", "dense fog", "thick fog", "very foggy", "thick with fog", "very thick with fog") ~ "fog",
weather_condition %in% c("heavy showers", "very heavy shower", "shower", "showery", "a little showery") ~ "showers",
weather_condition %in% c("drizzle", "drizzling rain") ~ "drizzle",
weather_condition %in% c("cold rain", "heavy rain storm", "heavy rain", "fine rain", "raining", "rainy", "rain", "rain spells", "rain squall", "rain storm", "moderate rain", "big rain storm", "very heavy rain") ~ "rain",
weather_condition %in% c("little snow", "snow sleet", "light snow", "thick snow", "pleasant snow", "snowy", "snow", "snow spells", "snow squall", "big snow storm", "snow storm", "snowing", "snowing fast", "moderate snow") ~ "snow",
weather_condition %in% c("stormy", "tough storm", "very heavy storm", "moderate rainstorm", "heavy storm") ~ "storm",
weather_condition %in% c("thunder", "heavy thunder") ~ "thunder",
weather_condition %in% c("sharp lightning", "lightning") ~ "lightning",
weather_condition %in% c("misty", "good weather", "moderate weather", "sun out", "hail") ~ "other",
))
weather_con <- rbind(weather_con_1873_long, weather_con_1874_long)
Wind Speed and Weather Conditions
#currently only have the 1873 weather
wind_weather <- full_join(combined_wind, weather_con, by = c("date_mdy", "month", "year", "period"))
## Warning in full_join(combined_wind, weather_con, by = c("date_mdy", "month", : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 104 of `x` matches multiple rows in `y`.
## ℹ Row 525 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
wind_weather <- wind_weather %>%
mutate(wind_direction = ifelse(wind_direction == "NA", NA, wind_direction)) %>%
mutate(wind_speed = ifelse(wind_speed == "NA", NA, wind_speed )) %>%
mutate(weather_condition = ifelse(weather_condition == "NA", NA, weather_condition )) %>%
drop_na(wind_direction, wind_speed, weather_condition)
freq_wind_weather <- wind_weather %>%
group_by(wind_speed,
weather_condition) %>%
summarize(frequency = n())
## `summarise()` has grouped output by 'wind_speed'. You can override using the
## `.groups` argument.
freq_wind_weather %>%
ggplot(aes(x = weather_condition,
y = wind_speed,
fill = frequency)) +
geom_tile() +
scale_fill_viridis_c() +
labs(title = "Wind Speed and Weather Conditions Heat Map",
x = "Weather Condition",
y = "Wind Speed",
fill = "Frequency") +
coord_flip() +
scale_fill_viridis_c(direction = -1) #for darker = higehr frequency
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

wind_weather %>%
group_by(wind_speed,
weather_condition,
year) %>%
summarize(frequency = n()) %>%
ggplot(aes(x = weather_condition,
y = wind_speed,
fill = frequency)) +
geom_tile() +
scale_fill_viridis_c() +
labs(title = "Wind Speed and Weather Conditions by Year Heat Map",
x = "Weather Condition",
y = "Wind Speed",
fill = "Frequency") +
facet_wrap(~ year, nrow = 2) +
coord_flip() +
scale_fill_viridis_c(direction = -1) #for darker = higehr frequency
## `summarise()` has grouped output by 'wind_speed', 'weather_condition'. You can
## override using the `.groups` argument.
## Scale for fill is already present. Adding another scale for fill, which will
## replace the existing scale.

Letters
journal_1873 <- journal_1873 %>%
mutate(letter = case_when(
letter == "read" ~ "received",
letter == "read and write" ~ "received and write",
TRUE ~ letter # Keep the original value if none of the conditions match
))
journal_1874 <- journal_1874 %>%
mutate(letter = case_when(
letter == "read" ~ "received",
letter == "read and write" ~ "received and write",
TRUE ~ letter # Keep the original value if none of the conditions match
))
letters_1873 <- journal_1873 %>%
select(date_mdy, month, journal_entry, letter, letter_from, letter_to, notes, ) %>%
mutate(year = "1873")
letters_1874 <- journal_1874 %>%
select(date_mdy, month, journal_entry, letter,letter_from, letter_to, notes) %>%
mutate(year = "1874")
combined_letters <- rbind(letters_1873, letters_1874)
combined_letters %>%
count(letter, year) %>%
mutate(letter = ifelse(letter == "NA", NA, letter)) %>%
drop_na(letter) %>%
ggplot(aes(x = letter,
y = n,
fill = year)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ year) +
labs(title = "Letter Communication",
subtitle = "For 1873 and 1874",
x = "Letter Status",
y = "Frequency",
fill = "Year")

combined_letters %>%
mutate(letter = ifelse(letter == "NA", NA, letter)) %>%
drop_na(letter) %>%
filter(letter != "no letter") %>%
count(letter, year) %>%
ggplot(aes(x = letter, y = n, fill = year)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ year) +
labs(title = "Frequency of Letter Communication Mentioned",
subtitle = "For 1873 and 1874",
x = "Letter Status",
y = "Frequency",
fill = "Year") +
scale_y_continuous(breaks = seq(5, 30, 5))

month_order <- c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")
combined_letters %>%
mutate(letter = ifelse(letter == "NA", NA, letter)) %>%
drop_na(letter) %>%
filter(letter != "no letter") %>%
mutate(month = factor(month, levels = month_order)) %>%
count(letter, year, month) %>%
ggplot(aes(x = letter, y = n, fill = year)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ year) +
labs(title = "Frequency of Letter Communication Mentioned per Month",
subtitle = "For 1873 and 1874",
x = "Letter Status",
y = "Frequency",
fill = "Year") +
facet_wrap(~ month)

Communication (Names & Letters)
#only rows with letters written or received
communication <- combined_letters %>%
mutate(letter = ifelse(letter == "NA", NA, letter)) %>%
drop_na(letter) %>%
filter(letter != "no letter") %>%
separate_rows(letter_from, sep = ", ") %>%
separate_rows(letter_to, sep = ", ") %>%
mutate(letter_from = case_when(
letter_from %in% c("Perlley and Russel", "Perley and Russell") ~ "Perley and Russell",
letter_from %in% c("G.L. Hodgkins", "G. L. Hodgkins", "G. L Hodgins") ~ "G. L. Hodgkins",
letter_from %in% c("Charles C. Burrill", "C.C. Burrill", "C. C. Burrill", "C.C Burrill", "C.C Burill") ~ "C. C. Burrill",
letter_from %in% c("Benj Kittridge", "Benj. Kittridge") ~ "Benj. Kittridge",
letter_from %in% c("Reg. of Deeds", "Reg of Deed") ~ "Reg of Deeds",
TRUE ~ letter_from)) %>%
mutate(letter_to = case_when(
letter_to %in% c("C. C. Burrill", "C.C Burrill") ~ "C. C. Burrill",
letter_to %in% c("Benj. Kittrige", "Benj. Kittridge") ~ "Benj. Kittridge",
TRUE ~ letter_to))
communication %>%
select(letter_from, month, year) %>%
count(letter_from, month, year) %>%
drop_na(letter_from) %>%
group_by(month, letter_from) %>%
mutate(last_word = str_extract_all(letter_from, "\\S+") %>% map_chr(tail, 1)) %>%
arrange(last_word) %>% # to arrange by alphabetical order based on last word
ggplot(aes(x = factor(letter_from, levels = unique(letter_from)),
y = n,
fill = month)) +
geom_bar(stat = "identity") +
theme_minimal() +
labs(title = "Frequency of Letters Received by Freeland Bunker",
subtitle = "In 1873 and 1874",
x = "People Sending the Letters",
y = "Frequency",
fill = "Month") +
scale_fill_discrete(labels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")) +
coord_flip() +
facet_wrap(~ year)

communication %>%
select(letter_to, month, year) %>%
count(letter_to, month, year) %>%
drop_na(letter_to) %>%
group_by(month, letter_to, year) %>%
mutate(last_word = str_extract_all(letter_to, "\\S+") %>% map_chr(tail, 1)) %>%
arrange(last_word) %>% #to arrange by alphabetical order
ggplot(aes(x = factor(letter_to, levels = unique(letter_to)),
y = n,
fill = month)) +
geom_bar(stat = "identity") +
theme_minimal() +
labs(title = "Frequency of Letters Written by Freeland Bunker",
subtitle = "In 1873 and 1874",
x = "Letter Recipients",
y = "Frequency",
fill = "Month") +
scale_fill_discrete(labels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")) +
coord_flip() +
facet_wrap(~ year)

Boats
#this is is from master_journal_lists as of 08/2023
boats <- c("Signal", "Amulet", "Black Warrior", "Sea Flower", "A.G. Brooks", "Henrietta", "Mermaid", "Harp", "Woodcock", "Roamer", "Virgin", "Neptune", "Banner", "Old Chad", "Sea Pigeon", "Elizabeth", "Wyoming", "Mary & Eliza", "Mars Hill", "Nauseag", "Washington", "Lebanon")
boats_lower <- tolower(boats)
journal_entry_ngram <- combined_journals %>%
unnest_tokens(bigram, journal_entry, token = "ngrams", n = 2)
journal_entry_ngram_lower <- journal_entry_ngram %>%
mutate(bigram = tolower(bigram))
bigram_counts_lower <- journal_entry_ngram_lower %>%
separate(bigram, c("word1", "word2"), sep = " ") %>%
filter(
(word1 %in% boats_lower & !word2 %in% stop_words$word) |
(word2 %in% boats_lower & !word1 %in% stop_words$word)
) %>%
count(word1, word2, sort = TRUE)
boats_mentions_filtered <- bigram_counts_lower %>%
filter(word1 %in% boats_lower | word2 %in% boats_lower)
boats_freq <- bind_rows(
data.frame(word = boats_mentions_filtered$word1, freq = boats_mentions_filtered$n),
data.frame(word = boats_mentions_filtered$word2, freq = boats_mentions_filtered$n)
)
boats_freq <- boats_freq %>%
count(word)
boats_freq <- boats_freq %>%
filter(word %in% boats_lower)
boats_freq$word <- tools::toTitleCase(boats_freq$word)
wordcloud2(boats_freq, size = 1, color = "random-light", backgroundColor = "black", minSize = 1, minRotation = 0)